knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(warning=FALSE, message=FALSE)
# Load libraries
library(RColorBrewer)
library(dittoSeq)
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th152/training/subset"
# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")
First, we read in the results from CISI pilot run for the chosen immune markers (CD15, CD20, CD3, CD38, CD4, CD68, CD8a, ICOS, Ki-67, MPO, panCK, SMA, CD303, FOXP3, GranzymeB). The pilot ran was done using no normalization and with k=1.
## Read results
# Read in all results for tonsil into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
full.names=TRUE, recursive=TRUE)
results.files <- results.files[grepl("pilot_immune_channels", results.files)]
results.df <- lapply(results.files, read_results, type="res", use_voi=FALSE) %>%
bind_rows()
## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
X.files <- X.files[grepl("pilot_immune_channels", X.files)]
# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing and if it is the ground truth or simulated X)
X.list <- lapply(X.files, read_results, type="x", use_voi=FALSE)
X.list <- lapply(X.list, function(sce.temp){
assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
sce.temp
})
## Read U
u.files <- list.files(analysis.path, "gene_modules.csv",
full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
u.files <- u.files[grepl("pilot_immune_channels", u.files)]
u <- lapply(u.files, read_U, type="training") %>%
bind_rows() %>%
dplyr::rename(dataset=rep) %>%
mutate(k="1")
## Read A
a.files <- list.files(analysis.path, "version_*",
full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
a.files <- a.files[grepl("pilot_immune_channels", u.files)]
a <- lapply(a.files, read_A, use_voi=FALSE) %>%
bind_rows() %>%
mutate(k="1")
# Read in masks for tonsil data
masks <- loadImages(masks.path, as.is=TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))
For each different measurement used to test training results, a barplot is shown comparing CISI results.
# For each different measurement of training results, plot barplot
# Melt dataframe for plotting
data_temp <- results.df %>%
dplyr::select(-c("dataset", "training", "datasize", "version", "Best crossvalidation fold")) %>%
pivot_longer(!c("simulation"), names_to="measure", values_to="value")
# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "measure")
print(results.barplot)
Correlation between ground truth and decomposed results per protein
for
asinh transformed counts.
aoi <- "exprs"
# Calculate correlations between ground truth and simulated data for each protein
X.cor <- lapply(X.list, function(sce){
counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
mutate(protein=rownames(.)) %>%
melt() %>%
dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
cell=variable) %>%
mutate(dataset=paste(metadata(sce)$training, metadata(sce)$dataset, sep="_"))
}) %>% bind_rows() %>%
group_by(protein, cell, dataset) %>%
summarise_all(na.omit) %>%
group_by(dataset, protein) %>%
mutate(correlation=cor(ground_truth, simulated))
# Plot correlations for k=1 for each test/training dataset combination
proteinCorr <- plot_protein_cor(X.cor) + ylim(0, 1)
print(proteinCorr)
Expression values for cells in test image for worst performing protein FOXP3.
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "FOXP3"
img.foxp3 <- plot_cells(X.list, masks, poi)
# Plot decomposed vs true results for test roi (002)
img.foxp3 <- plot_grid(plotlist=append(img.foxp3[grepl("20220520_TsH_th152_cisi1_002",
names(img.foxp3))],
img.foxp3[grepl("legend",
names(img.foxp3))]), ncol=2,
labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
label_size=15, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
img.foxp3
Expression values for cells in test image for second worst performing protein CD303.
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi2 <- "CD303"
img.cd303 <- plot_cells(X.list, masks, poi2)
# Plot decomposed vs true results for test roi (002)
img.cd303 <- plot_grid(plotlist=append(img.cd303[grepl("20220520_TsH_th152_cisi1_002",
names(img.cd303))],
img.cd303[grepl("legend",
names(img.cd303))]), ncol=2,
labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
label_size=15, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
img.cd303
# Have a look at dittoRidgePlots of a bad performing protein
X.list <- lapply(X.list, function(sce.temp) {
colData(sce.temp)$dummy <- "1"
sce.temp
})
plot.list <- NULL
for(sce in X.list){
plot.list <- append(plot.list, list(dittoRidgePlot(sce, var="CD303",
group.by="dummy", assay="exprs") +
ggtitle(metadata(sce)$ground_truth) +
coord_cartesian(xlim=c(0, 6.2), expand=TRUE)))
}
print(plot_grid(plotlist=plot.list, ncol=1))
u.plot <- plot_single_U(u %>%
dplyr::select(module, membership, protein),
paste0("U", length(unique(u$module))))
print(u.plot)
a.plot <- plot_single_A(a %>%
dplyr::select(-c("dataset", "training", "k")), "A")
print(a.plot)